In [ ]:
 
In [2]:
import scipy
import scanpy as sc
import pandas as pd
import numpy as np
import stlearn as st
import matplotlib.pyplot as plt
import os
In [22]:
os.chdir("/QRISdata/Q4386/curio/breast/Human_breast_cancer_3by3_v1pt1/")
In [2]:
os.chdir("/scratch/project/stseq/Prakrithi/curio/Human_breast_cancer_3by3_v1pt1_input/Human_breast_cancer_3by3_v1pt1")
In [2]:
os.chdir("/scratch/project/stseq/Prakrithi/curio/Human_breast_cancer_3by3_v1pt1_input/Human_breast_cancer_3by3_v1pt1")
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[2], line 1
----> 1 os.chdir("/scratch/project/stseq/Prakrithi/curio/Human_breast_cancer_3by3_v1pt1_input/Human_breast_cancer_3by3_v1pt1")

FileNotFoundError: [Errno 2] No such file or directory: '/scratch/project/stseq/Prakrithi/curio/Human_breast_cancer_3by3_v1pt1_input/Human_breast_cancer_3by3_v1pt1'
In [23]:
uA=np.transpose(pd.read_csv('uTARs_mincell_filtered_mat_final.txt', sep="\t", header=0, index_col=0))
spatialuA=pd.read_csv('spatial_row_col', sep="\t", header=0)
breast_uTARs = st.create_stlearn(count=uA,spatial=spatialuA,library_id="A",scale=0.248)
In [40]:
st.pl.QC_plot(breast_uTARs, spot_size=(1,2), tissue_alpha=0.4, dpi=900,cmap="Reds")#,name="MBA_uTARQC.pdf", output="/QRISdata/Q4386/polyA_FreshFrozen/QCplots/") 
No description has been provided for this image
In [57]:
sc.pl.spatial(breast_uTARs,alpha_img=0.6, color =['chr5_11931649_11933299_+_783_0','chr18_63063649_63064549_-_5375_0','chr17_49855249_49858099_+_262_0','chr6_6676899_6678299_-_97_0',
                                           'chr1_91049599_91049999_+_7710_0','chr10_37391399_37422749_+_39822_0','chr1_31439949_31455549_-_5769_0'],cmap='Reds',spot_size=50, vmax=0.1)

#uncharacterized LOC107984223 chr10_37391399_37422749_+_39822_0
No description has been provided for this image
In [ ]:
sc.pl.spatial(breast_uTARs,alpha_img=0.6, color =['chr21_43784699_43785949_+_29_0','chr6_6676899_6678299_-_97_0','chr1_91050599_91051949_+_62_0'],cmap='Reds',spot_size=50, vmax=0.1)

#uncharacterized LOC107984223 chr10_37391399_37422749_+_39822_0
In [21]:
row_idx = uAs.var_names.get_loc('chr5_11931649_11933299_+_783_0')

Spatially variable uTARs¶

In [24]:
sc.pl.spatial(breast_uTARs,alpha_img=0.6, color =['chr13_22998649_23006399_-_10358_0','chr4_124305749_124320749_+_8075_0',
'chr2_105223299_105224399_+_2070_0',
'chr17_21966899_21984599_+_24995_0',
'chr20_31161799_31191899_+_9716_0'],cmap='Reds',spot_size=50, vmax=0.1)
No description has been provided for this image
In [27]:
import matplotlib.pyplot as plt

with plt.rc_context({"figure.figsize": (8, 8), "figure.dpi": (300)}):
    sc.pl.spatial(breast_uTARs,alpha_img=0.6, color =['chr2_105223299_105224399_+_2070_0'],cmap='inferno',spot_size=50, vmax=0.1, show=False)

    plt.savefig("/scratch/project/stseq/Prakrithi/STOMICS/cuTAR144737_curio_breast.pdf", bbox_inches="tight")
No description has been provided for this image

Genes¶

In [21]:
uAg=np.transpose(pd.read_csv('genes_mincell_filtered_matrix.txt', sep="\t", header=0, index_col=0))
spatialuAg=pd.read_csv('spatial_row_col_genes', sep="\t", header=0)
breast_genes = st.create_stlearn(count=uAg,spatial=spatialuAg,library_id="A",scale=0.248)
In [4]:
st.pl.QC_plot(breast_genes, spot_size=(1,2), tissue_alpha=0.4, dpi=900,cmap="Reds")
No description has been provided for this image
In [10]:
sc.pl.spatial(breast_genes,alpha_img=0.6, color =['GAPDH','MALAT1','EPCAM','IGKC'],cmap='Spectral_r',spot_size=30,vmax=10)
No description has been provided for this image
In [11]:
sc.pl.spatial(breast_genes,alpha_img=0.6, color =['TINCR', 'IRAG2','MGP','CYB561','NME1','TOB1','CLU','CR2'],cmap='Spectral_r',spot_size=30,vmax=2)
No description has been provided for this image
In [26]:
sc.pl.spatial(breast_genes,alpha_img=0.6, color =['TINCR', 'IRAG2','MGP','CYB561'],cmap='Spectral_r', spot_size=30)
No description has been provided for this image
In [25]:
sc.pl.spatial(breast_genes,alpha_img=0.6, color =['BIRC5'],cmap='Reds',vmax=2, spot_size=30)
No description has been provided for this image

Curio pipeline genes¶

In [6]:
curio_genes = sc.read_h5ad('../../Human_breast_cancer_3by3_v1pt1/Human_breast_cancer_v1pt1_anndata.h5ad')
sc.pl.spatial(curio_genes,alpha_img=0.6, color =['TINCR', 'IRAG2','MGP','CYB561'],cmap='Spectral_r',vmax=2, spot_size=30)
No description has been provided for this image
In [5]:
sc.pl.spatial(curio_genes,alpha_img=0.6, color =['GAPDH','MALAT1','EPCAM','IGKC'],cmap='Spectral_r',vmax=10, spot_size=30)
No description has been provided for this image

SpatDE¶

In [1]:
import NaiveDE
import SpatialDE
import anndata as ad
import numpy as np
In [ ]:
# Assuming `adata` is your AnnData object containing Visium data
# Calculate total counts for each barcode (spot)
breast_uTARs.obs['total_counts'] = breast_uTARs.X.sum(axis=1)

# Display the first few total counts
print(breast_uTARs.obs['total_counts'].head())
TTGGTCAAACGGGG    100
CGTCTGCAGGCCAA     76
AATTCACGCCTACC     95
GCTAGAGCAGATCG     46
CAGCAAGAGCCTCC     80
Name: total_counts, dtype: int64
In [41]:
counts = uA.T[uA.sum(0) >= 3].T
counts
Out[41]:
GENE chr10_100022799_100025349_+_62_0 chr10_100029249_100032199_+_103_0 chr10_100096949_100098099_+_40_0 chr10_100145399_100146199_-_63_0 chr10_100186099_100187449_+_102_0 chr10_100484449_100486349_+_39_0 chr10_100563349_100565049_+_136_0 chr10_100568299_100569549_+_31_0 chr10_100570899_100572699_+_162_0 chr10_100573249_100575549_-_31_0 ... chrY_56825699_56837199_-_1069_0 chrY_56867549_56869199_-_126_0 chrY_56875449_56876599_+_59_0 chrY_56880799_56881949_+_52_0 chrY_56883199_56886099_+_135_0 chrY_6749099_6749849_+_51_0 chrY_7136299_7137599_+_74_0 chrY_7443299_7443549_-_45_0 chrY_8232549_8232699_-_181_0 chrY_9629449_9631199_+_33_0
TTGGTCAAACGGGG 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
CGTCTGCAGGCCAA 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
AATTCACGCCTACC 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
GCTAGAGCAGATCG 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
CAGCAAGAGCCTCC 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
TTGCGCCGCAGTCG 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
GTGTTGAGCTCCGA 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
TTTTTTCCTCGCAG 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
TTTAGGTATCTCAA 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
TCCCGCTAGCAAAG 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

51174 rows × 27401 columns

In [8]:
counts=np.transpose(pd.read_csv('uTARs_in_Alex_samples_mat_mincell.txt', sep="\t", header=0, index_col=0))
counts = counts.T[counts.sum(0) >= 3].T
counts.head()
Out[8]:
GENE chr10_100022799_100025349_+_62_0 chr10_100096949_100098099_+_40_0 chr10_100145399_100146199_-_63_0 chr10_100186099_100187449_+_102_0 chr10_100484449_100486349_+_39_0 chr10_100568299_100569549_+_31_0 chr10_100570899_100572699_+_162_0 chr10_100896449_100900849_-_73_0 chr10_101043349_101043799_+_29_0 chr10_101053899_101057549_+_287_0 ... chrY_14180649_14180799_-_83_0 chrY_15030199_15032799_-_28_0 chrY_15882949_15883099_-_5492_0 chrY_15896049_15896899_+_3426_0 chrY_16912899_16915549_+_83_0 chrY_22274399_22277699_+_108_0 chrY_56875449_56876599_+_59_0 chrY_56883199_56886099_+_135_0 chrY_6749099_6749849_+_51_0 chrY_8232549_8232699_-_181_0
TTGGTCAAACGGGG 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
CGTCTGCAGGCCAA 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
AATTCACGCCTACC 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
GCTAGAGCAGATCG 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
CAGCAAGAGCCTCC 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 12324 columns

In [9]:
counts=counts+1
In [9]:
print(counts.shape)
print(sample_info.shape)
print(norm_expr.shape)
(51174, 12324)
(51174, 3)
(51174, 12324)
In [20]:
pip show numpy
WARNING: Ignoring invalid distribution -cikit-learn (/home/s4716765/.local/lib/python3.8/site-packages)
Name: numpy
Version: 1.21.6
Summary: NumPy is the fundamental package for array computing with Python.
Home-page: https://www.numpy.org
Author: Travis E. Oliphant et al.
Author-email: 
License: BSD
Location: /home/s4716765/.local/lib/python3.8/site-packages
Requires: 
Required-by: access, anndata, biopython, bokeh, contourpy, gensim, geosketch, h5py, harmonypy, imageio, inequality, Keras-Applications, Keras-Preprocessing, libpysal, lightning, mapclassify, matplotlib, mgwr, mudata, muon, NaiveDE, numba, opt-einsum, pandas, patsy, pointpats, pygeos, PyWavelets, quantecon, rasterio, rasterstats, scanorama, scanpy, scikit-image, scikit-learn, scipy, seaborn, segregation, shapely, snuggs, spaghetti, SpatialDE, spglm, spint, splot, spopt, spreg, spvcm, statsmodels, stlearn, tensorboard, tensorflow, tifffile, tobler, torchmetrics, umap-learn, xgboost, yoctol-keras-layer-zoo
Note: you may need to restart the kernel to use updated packages.
In [ ]:
 
In [2]:
import sys
print(sys.executable)
print("Numpy location:", np.__file__)
/home/s4716765/.conda/envs/stlearn_heart/bin/python
Numpy location: /home/s4716765/.local/lib/python3.8/site-packages/numpy/__init__.py
In [14]:
sample_info = pd.read_csv('spatDE_metadata_mingene3.txt',sep="\t", header=0, index_col=0) # ERROR CAZ SOME CELLS HAD 0 COUNTS. REMOVE THOSE CELLS IN THAT CASE
counts=counts.loc[counts.index.intersection(sample_info.index)]
counts
Out[14]:
GENE chr10_100022799_100025349_+_62_0 chr10_100096949_100098099_+_40_0 chr10_100145399_100146199_-_63_0 chr10_100186099_100187449_+_102_0 chr10_100484449_100486349_+_39_0 chr10_100568299_100569549_+_31_0 chr10_100570899_100572699_+_162_0 chr10_100896449_100900849_-_73_0 chr10_101043349_101043799_+_29_0 chr10_101053899_101057549_+_287_0 ... chrY_14180649_14180799_-_83_0 chrY_15030199_15032799_-_28_0 chrY_15882949_15883099_-_5492_0 chrY_15896049_15896899_+_3426_0 chrY_16912899_16915549_+_83_0 chrY_22274399_22277699_+_108_0 chrY_56875449_56876599_+_59_0 chrY_56883199_56886099_+_135_0 chrY_6749099_6749849_+_51_0 chrY_8232549_8232699_-_181_0
TTGGTCAAACGGGG 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
CGTCTGCAGGCCAA 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
AATTCACGCCTACC 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 2 1 1 1 1 1
GCTAGAGCAGATCG 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
CAGCAAGAGCCTCC 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
TTCGGCGAGCGGTC 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
AACCGGTGTCCACA 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
CGCCAGAGTCCTAA 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
TGAGACTCTATTTA 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
ATGCAACGCGCGTT 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1

23910 rows × 12324 columns

In [15]:
norm_expr = NaiveDE.stabilize(counts.T).T
resid_expr = NaiveDE.regress_out(sample_info, norm_expr.T, 'np.log(total_counts)').T
In [20]:
sample_resid_expr = resid_expr.sample(n=1000, axis=1, random_state=1)
X = sample_info[['x', 'y']]
#results = SpatialDE.run(X, sample_resid_expr)
results = SpatialDE.run(np.array(X), sample_resid_expr)
Models:   0%|          | 0/10 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
In [21]:
results.head()
Out[21]:
FSV M g l max_delta max_ll max_mu_hat max_s2_t_hat model n s2_FSV s2_logdelta time BIC max_ll_null LLR pval qval
0 0.999955 4 chr16_48493499_48494949_+_72_0 0.70767 0.000045 55677.911720 -0.694482 0.482812 SE 23910 1492.340209 5.243904e+11 0.004383 -111315.495231 55677.897630 0.014089 0.905515 0.949501
1 0.999955 4 chr8_56296449_56297249_-_33_0 0.70767 0.000045 64756.456167 -0.694383 0.482379 SE 23910 1515.696452 5.325975e+11 0.004405 -129472.584126 64756.442302 0.013865 0.906266 0.949501
2 0.999955 4 chr8_89367849_89369949_-_161_0 0.70767 0.000045 52682.399520 -0.696059 0.485163 SE 23910 1526.864389 5.365218e+11 0.004248 -105324.470832 52682.385705 0.013815 0.906434 0.949501
3 0.999955 4 chr2_42764899_42766849_+_99_0 0.70767 0.000045 64759.075312 -0.694854 0.483032 SE 23910 1512.995851 5.316485e+11 0.004197 -129477.822416 64759.061420 0.013892 0.906176 0.949501
4 0.999955 4 chr8_33307899_33308549_-_93_0 0.70767 0.000045 67773.092187 -0.695065 0.483269 SE 23910 1580.821025 5.554815e+11 0.004241 -135505.856166 67773.078814 0.013373 0.907937 0.949501
In [22]:
results.to_csv('Breast_SpatDE_results.txt', sep='\t', index=False)
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [12]:
design_matrix = np.vstack([np.ones(sample_info.shape[0]), np.log(sample_info['total_counts'])]).T
In [13]:
design_matrix
Out[13]:
array([[1.        , 4.60517019],
       [1.        , 4.33073334],
       [1.        , 4.55387689],
       ...,
       [1.        ,       -inf],
       [1.        ,       -inf],
       [1.        ,       -inf]])
In [ ]:
sample_resid_expr = resid_expr.sample(n=10000, axis=1, random_state=1)
X = sample_info[['x', 'y']]
results = SpatialDE.run(X, sample_resid_expr)
In [39]:
sample_info
Out[39]:
total_counts x y
GENE
TTGGTCAAACGGGG 100 711.0160 727.1112
CGTCTGCAGGCCAA 76 924.4944 425.7168
AATTCACGCCTACC 95 781.6960 1334.8104
GCTAGAGCAGATCG 46 373.5128 401.0656
CAGCAAGAGCCTCC 80 1119.9680 612.0392
... ... ... ...
TTGCGCCGCAGTCG 0 1141.9656 1294.4112
GTGTTGAGCTCCGA 0 657.1008 452.1040
TTTTTTCCTCGCAG 0 1005.8632 533.3488
TTTAGGTATCTCAA 0 619.0328 522.8832
TCCCGCTAGCAAAG 0 1130.3840 1240.7440

51174 rows × 3 columns

Moran's¶

In [3]:
m=np.transpose(pd.read_csv('gene_uTAR_merged.txt', sep="\t", header=0, index_col=0))
spatialm=pd.read_csv('spatial_row_col', sep="\t", header=0)
gene_uTAR_merged = st.create_stlearn(count=m,spatial=spatialm,library_id="A",scale=0.248)
In [4]:
import joblib
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
#!pip install pysal
import pysal
from pysal.explore import esda
import pysal.lib as lps
from esda.moran import Moran, Moran_Local, Moran_BV, Moran_Local_BV
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_moran_bv_simulation, plot_moran_bv, plot_local_autocorrelation
from libpysal.weights.contiguity import Queen
from libpysal import examples
import numpy as np
import os
import scanpy as sc
import stlearn as st
/home/s4716765/.local/lib/python3.8/site-packages/geopandas/_compat.py:124: UserWarning: The Shapely GEOS version (3.11.2-CAPI-1.17.2) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow.
  warnings.warn(
/scratch/temp/10916475/ipykernel_1753339/3900667329.py:3: DeprecationWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas still uses PyGEOS by default. However, starting with version 0.14, the default will switch to Shapely. To force to use Shapely 2.0 now, you can either uninstall PyGEOS or set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:

import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd
/home/s4716765/.local/lib/python3.8/site-packages/spaghetti/network.py:40: FutureWarning: The next major release of pysal/spaghetti (2.0.0) will drop support for all ``libpysal.cg`` geometries. This change is a first step in refactoring ``spaghetti`` that is expected to result in dramatically reduced runtimes for network instantiation and operations. Users currently requiring network and point pattern input as ``libpysal.cg`` geometries should prepare for this simply by converting to ``shapely`` geometries.
  warnings.warn(dep_msg, FutureWarning, stacklevel=1)
In [5]:
#spatial.iloc[1:5,:]
#spatial=spatial.set_index('BC')
spatialm=pd.read_csv('spatial_row_col', sep="\t", header=0)
spatialm.index =gene_uTAR_merged.obs.index
#data.obs=spatial.iloc[:,[0,1,2,3,4]]


from typing import Literal

library_id='A'
#data.obs.iloc[1:5,:]
_QUALITY = Literal["fulres", "hires", "lowres"]
quality: _QUALITY = "hires"
scale = gene_uTAR_merged.uns["spatial"][library_id]["scalefactors"][
            "tissue_" + quality + "_scalef"]

image_coor = gene_uTAR_merged.obsm["spatial"] * scale
spatialm["imagecol"] = image_coor[:, 1]*0.208
spatialm["imagerow"] = image_coor[:, 0]*0.208
#s4 0.208 s6 0.2025
 
gene_uTAR_merged.obsm["gpd"] = gpd.GeoDataFrame(spatialm.iloc[:,:],geometry=gpd.points_from_xy(y=spatialm.imagerow,x=spatialm.imagecol))
In [31]:
import anndata

# Assuming spatial_df is your DataFrame and adata is your AnnData object

# 1. Get the row names of spatial_df
matching_cells = sample_info.index

# 2. Subset the AnnData object
gene_uTAR_mergedn = gene_uTAR_merged[gene_uTAR_merged.obs_names.isin(matching_cells)].copy()

# Now adata_subset contains only the observations that match the row names of spatial_df
In [64]:
## CALC MORAN'S INDEX
epr_df = gene_uTAR_merged.to_df()


#gene1_name="IRAK3"#"MSRB3"#"PFN1"#"ATOX1"
#"VHL"#"PTEN"#"TMSB10"#"ITM2B" #"PKM" #"ANXA2"  #CCND1:chr19_9687799_9691349_-_3392_0
#gene2_name="12_66056899_66058349_+_736630_0"#"X_44794599_44795299_+_146087_0"#"18_63063049_63063849_-_11504_0"#"5_18162949_18163149_+_4437_0"
gene1_name="FUS"#""##"HNRNPD"#"FUS"#"ELAVL1"#"TIA1"#"EWSR1"#"TNRC6A"#"IGF2BP2"#"AGO4"#"AGO3"#"PTBP1"#"TAF15"#"IGF2BP1"#"SRSF1"#"IGF2BP3"#"FUS"#"TARDBP"#"AGO2" "AGO1"#"TAF15"#"MOV10"
gene2_name="chr10_37391399_37422749_+_39822_0"#"12_66057549_66058349_+_212175_0"#"1_91048949_91049999_+_26510_0"#""#"12_66056899_66058349_+_736630_0"#"5_18162949_18163149_+_4437_0"
# AKA cuTAR33004 cuTAR32998
gene_uTAR_merged.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
gene_uTAR_merged.obsm["gpd"][gene2_name] = epr_df[gene2_name]
w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)

#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)

fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
/scratch/temp/10368256/ipykernel_1411758/3573628254.py:14: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning
  w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
No description has been provided for this image
In [58]:
def plot_choropleth(gdf, 
                    attribute_1,
                    attribute_2,
                    alpha=0.5,
                    scheme='Quantiles', 
                    cmap='YlGnBu', 
                    legend=True):
    
    fig, axs = plt.subplots(2,1, figsize=(5, 8),
                            subplot_kw={'adjustable':'datalim'})
    
    # Choropleth for attribute_1
    gdf.plot(column=attribute_1, scheme=scheme, cmap=cmap,
             legend=legend, legend_kwds={'loc': 'upper left',
                                         'bbox_to_anchor': (0.92, 0.8)},
             ax=axs[0], alpha=alpha, markersize=2)
    
    axs[0].set_title('Choropleth plot for {}'.format(attribute_1), y=0.8)
    axs[0].set_axis_off()
    
    # Choropleth for attribute_2
    gdf.plot(column=attribute_2, scheme=scheme, cmap=cmap,
             legend=legend, legend_kwds={'loc': 'upper left',
                                         'bbox_to_anchor': (0.92, 0.8)},
             ax=axs[1], alpha=alpha, markersize=2)
    
    axs[1].set_title('Choropleth plot for {}'.format(attribute_2), y=0.8)
    axs[1].set_axis_off()
    
    plt.tight_layout()
    return fig, axs

# Use the modified function without the img argument
plot_choropleth(gene_uTAR_merged.obsm["gpd"], 
                gene1_name,
                gene2_name)
plt.show()
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2.
  warnings.warn(
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2.
  warnings.warn(
No description has been provided for this image
In [63]:
fig, axs = plt.subplots(1, 1, figsize=(5, 8), 
                        subplot_kw={'adjustable':'datalim'})

# Main spatial correlation plot 
lisa_cluster(moran_loc_bv, 
             gene_uTAR_merged.obsm["gpd"], 
             p=0.05,
             figsize=(9, 9),
             markersize=0.3,
             **{"alpha": 1},
             ax=axs,
             legend_kwds={'loc': 'upper left',
                          'bbox_to_anchor': (0.92, 0.8)})
             
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
No description has been provided for this image
In [3]:
 
In [64]:
## CALC MORAN'S INDEX
epr_df = gene_uTAR_merged.to_df()


#gene1_name="IRAK3"#"MSRB3"#"PFN1"#"ATOX1"
#"VHL"#"PTEN"#"TMSB10"#"ITM2B" #"PKM" #"ANXA2"  #CCND1:chr19_9687799_9691349_-_3392_0
#gene2_name="12_66056899_66058349_+_736630_0"#"X_44794599_44795299_+_146087_0"#"18_63063049_63063849_-_11504_0"#"5_18162949_18163149_+_4437_0"
gene1_name="BIRC5"#""##"HNRNPD"#"FUS"#"ELAVL1"#"TIA1"#"EWSR1"#"TNRC6A"#"IGF2BP2"#"AGO4"#"AGO3"#"PTBP1"#"TAF15"#"IGF2BP1"#"SRSF1"#"IGF2BP3"#"FUS"#"TARDBP"#"AGO2" "AGO1"#"TAF15"#"MOV10"
gene2_name="chr10_37391399_37422749_+_39822_0"#"12_66057549_66058349_+_212175_0"#"1_91048949_91049999_+_26510_0"#""#"12_66056899_66058349_+_736630_0"#"5_18162949_18163149_+_4437_0"

gene_uTAR_merged.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
gene_uTAR_merged.obsm["gpd"][gene2_name] = epr_df[gene2_name]
w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)

#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)

fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()


def plot_choropleth(gdf, 
                    attribute_1,
                    attribute_2,
                    alpha=0.5,
                    scheme='Quantiles', 
                    cmap='YlGnBu', 
                    legend=True):
    
    fig, axs = plt.subplots(2,1, figsize=(5, 8),
                            subplot_kw={'adjustable':'datalim'})
        # Choropleth for attribute_1
    gdf.plot(column=attribute_1, scheme=scheme, cmap=cmap,
             legend=legend, legend_kwds={'loc': 'upper left',
                                         'bbox_to_anchor': (0.92, 0.8)},
             ax=axs[0], alpha=alpha, markersize=2)
    
    axs[0].set_title('Choropleth plot for {}'.format(attribute_1), y=0.8)
    axs[0].set_axis_off()
        # Choropleth for attribute_2
    gdf.plot(column=attribute_2, scheme=scheme, cmap=cmap,
             legend=legend, legend_kwds={'loc': 'upper left',
                                         'bbox_to_anchor': (0.92, 0.8)},
             ax=axs[1], alpha=alpha, markersize=2)
    
    axs[1].set_title('Choropleth plot for {}'.format(attribute_2), y=0.8)
    axs[1].set_axis_off()
    
    plt.tight_layout()
    return fig, axs
# Use the modified function without the img argument
plot_choropleth(gene_uTAR_merged.obsm["gpd"], 
                gene1_name,
                gene2_name)
plt.show()


# Main spatial correlation plot 
lisa_cluster(moran_loc_bv, 
             gene_uTAR_merged.obsm["gpd"], 
             p=0.05,
             figsize=(9, 9),
             markersize=0.15,
             **{"alpha": 1},
             ax=axs,
             legend_kwds={'loc': 'upper left',
                          'bbox_to_anchor': (0.92, 0.8)})
             
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10858931/ipykernel_3186245/3755851813.py:14: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning
  w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
No description has been provided for this image
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2.
  warnings.warn(
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2.
  warnings.warn(
No description has been provided for this image
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
In [12]:
## CALC MORAN'S INDEX
epr_df = gene_uTAR_merged.to_df()


#gene1_name="IRAK3"#"MSRB3"#"PFN1"#"ATOX1"
#"VHL"#"PTEN"#"TMSB10"#"ITM2B" #"PKM" #"ANXA2"  #CCND1:chr19_9687799_9691349_-_3392_0
#gene2_name="12_66056899_66058349_+_736630_0"#"X_44794599_44795299_+_146087_0"#"18_63063049_63063849_-_11504_0"#"5_18162949_18163149_+_4437_0"
gene1_name="BIRC5"#""##"HNRNPD"#"FUS"#"ELAVL1"#"TIA1"#"EWSR1"#"TNRC6A"#"IGF2BP2"#"AGO4"#"AGO3"#"PTBP1"#"TAF15"#"IGF2BP1"#"SRSF1"#"IGF2BP3"#"FUS"#"TARDBP"#"AGO2" "AGO1"#"TAF15"#"MOV10"
gene2_name="chr5_11931649_11933299_+_783_0"#"12_66057549_66058349_+_212175_0"#"1_91048949_91049999_+_26510_0"#""#"12_66056899_66058349_+_736630_0"#"5_18162949_18163149_+_4437_0"

gene_uTAR_merged.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
gene_uTAR_merged.obsm["gpd"][gene2_name] = epr_df[gene2_name]
w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)

#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)

fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()

fig, axs = plt.subplots(1, 1, figsize=(5, 8), 
                        subplot_kw={'adjustable':'datalim'})

###
def plot_choropleth(gdf, 
                    attribute_1,
                    attribute_2,
                    alpha=0.5,
                    scheme='Quantiles', 
                    cmap='YlGnBu', 
                    legend=True):
    
    fig, axs = plt.subplots(2,1, figsize=(5, 8),
                            subplot_kw={'adjustable':'datalim'})
        # Choropleth for attribute_1
    gdf.plot(column=attribute_1, scheme=scheme, cmap=cmap,
             legend=legend, legend_kwds={'loc': 'upper left',
                                         'bbox_to_anchor': (0.92, 0.8)},
             ax=axs[0], alpha=alpha, markersize=2)
    
    axs[0].set_title('Choropleth plot for {}'.format(attribute_1), y=0.8)
    axs[0].set_axis_off()
        # Choropleth for attribute_2
    gdf.plot(column=attribute_2, scheme=scheme, cmap=cmap,
             legend=legend, legend_kwds={'loc': 'upper left',
                                         'bbox_to_anchor': (0.92, 0.8)},
             ax=axs[1], alpha=alpha, markersize=2)
    
    axs[1].set_title('Choropleth plot for {}'.format(attribute_2), y=0.8)
    axs[1].set_axis_off()
    
    plt.tight_layout()
    return fig#, axs
# Use the modified function without the img argument
plot_choropleth(gene_uTAR_merged.obsm["gpd"], 
                gene1_name,
                gene2_name)
plt.show()
###

# Main spatial correlation plot 
lisa_cluster(moran_loc_bv, 
             gene_uTAR_merged.obsm["gpd"], 
             p=0.05,
             figsize=(9, 9),
             markersize=0.3,
             **{"alpha": 1},
             ax=axs,
             legend_kwds={'loc': 'upper left',
                          'bbox_to_anchor': (0.92, 0.8)})
             
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10858931/ipykernel_3186245/16448487.py:14: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning
  w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
No description has been provided for this image
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2.
  warnings.warn(
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2.
  warnings.warn(
No description has been provided for this image
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
In [35]:
## CALC MORAN'S INDEX
epr_df = gene_uTAR_merged.to_df()


#gene1_name="IRAK3"#"MSRB3"#"PFN1"#"ATOX1"
#"VHL"#"PTEN"#"TMSB10"#"ITM2B" #"PKM" #"ANXA2"  #CCND1:chr19_9687799_9691349_-_3392_0
#gene2_name="12_66056899_66058349_+_736630_0"#"X_44794599_44795299_+_146087_0"#"18_63063049_63063849_-_11504_0"#"5_18162949_18163149_+_4437_0"
gene1_name="TARDBP"#""##"HNRNPD"#"FUS"#"ELAVL1"#"TIA1"#"EWSR1"#"TNRC6A"#"IGF2BP2"#"AGO4"#"AGO3"#"PTBP1"#"TAF15"#"IGF2BP1"#"SRSF1"#"IGF2BP3"#"FUS"#"TARDBP"#"AGO2" "AGO1"#"TAF15"#"MOV10"
gene2_name="chr13_22998649_23006399_-_10358_0"#"12_66057549_66058349_+_212175_0"#"1_91048949_91049999_+_26510_0"#""#"12_66056899_66058349_+_736630_0"#"5_18162949_18163149_+_4437_0"

gene_uTAR_merged.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
gene_uTAR_merged.obsm["gpd"][gene2_name] = epr_df[gene2_name]
w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)

#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)

fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()

fig, axs = plt.subplots(1, 1, figsize=(5, 8), 
                        subplot_kw={'adjustable':'datalim'})

# Main spatial correlation plot 
lisa_cluster(moran_loc_bv, 
             gene_uTAR_merged.obsm["gpd"], 
             p=0.05,
             figsize=(9, 9),
             markersize=0.3,
             **{"alpha": 1},
             ax=axs,
             legend_kwds={'loc': 'upper left',
                          'bbox_to_anchor': (0.92, 0.8)})
             
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10490146/ipykernel_1489797/524671805.py:14: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning
  w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
No description has been provided for this image
No description has been provided for this image
In [34]:
gene_uTAR_mergedn = gene_uTAR_merged
In [7]:
## CALC MORAN'S INDEX
epr_df = gene_uTAR_merged.to_df()


gene1_name="ID3"#""##"HNRNPD"#"FUS"#"ELAVL1"#"TIA1"#"EWSR1"#"TNRC6A"#"IGF2BP2"#"AGO4"#"AGO3"#"PTBP1"#"TAF15"#"IGF2BP1"#"SRSF1"#"IGF2BP3"#"FUS"#"TARDBP"#"AGO2" "AGO1"#"TAF15"#"MOV10"
gene2_name="chr17_49855249_49858099_+_262_0"

gene_uTAR_merged.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
gene_uTAR_merged.obsm["gpd"][gene2_name] = epr_df[gene2_name]
w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)

#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)

fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()

#
def plot_choropleth(gdf, 
                    attribute_1,
                    attribute_2,
                    alpha=0.5,
                    scheme='Quantiles', 
                    cmap='YlGnBu', 
                    legend=True):
    
    fig, axs = plt.subplots(2,1, figsize=(5, 8),
                            subplot_kw={'adjustable':'datalim'})
        # Choropleth for attribute_1
    gdf.plot(column=attribute_1, scheme=scheme, cmap=cmap,
             legend=legend, legend_kwds={'loc': 'upper left',
                                         'bbox_to_anchor': (0.92, 0.8)},
             ax=axs[0], alpha=alpha, markersize=2)
    
    axs[0].set_title('Choropleth plot for {}'.format(attribute_1), y=0.8)
    axs[0].set_axis_off()
        # Choropleth for attribute_2
    gdf.plot(column=attribute_2, scheme=scheme, cmap=cmap,
             legend=legend, legend_kwds={'loc': 'upper left',
                                         'bbox_to_anchor': (0.92, 0.8)},
             ax=axs[1], alpha=alpha, markersize=2)
    
    axs[1].set_title('Choropleth plot for {}'.format(attribute_2), y=0.8)
    axs[1].set_axis_off()
    
    plt.tight_layout()
    return fig#, axs
# Use the modified function without the img argument
plot_choropleth(gene_uTAR_merged.obsm["gpd"], 
                gene1_name,
                gene2_name)
plt.show()
###

# Main spatial correlation plot 
lisa_cluster(moran_loc_bv, 
             gene_uTAR_merged.obsm["gpd"], 
             p=0.05,
             figsize=(9, 9),
             markersize=0.3,
             **{"alpha": 1},
             ax=axs,
             legend_kwds={'loc': 'upper left',
                          'bbox_to_anchor': (0.92, 0.8)})
             
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10916475/ipykernel_1753339/139539757.py:11: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning
  w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
No description has been provided for this image
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2.
  warnings.warn(
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2.
  warnings.warn(
No description has been provided for this image
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
In [ ]:
 
In [9]:
curio_genes_mel
Out[9]:
AnnData object with n_obs × n_vars = 297240 × 28803
    obsm: 'X_spatial', 'spatial'
In [ ]:
count_df = pd.DataFrame(curio_genes_mel.X.toarray().T,  # Transpose the matrix to match the desired shape
                        index=curio_genes_mel.var.index,   # Genes as rows
                        columns=curio_genes_mel.obs.index)  # Cells as columns

# Write the DataFrame to a tab-separated file
count_df.to_csv("curio_gene_matrix.txt", sep="\t", index=True, header=True)
In [8]:
# Read the count matrix text file into a DataFrame
count_df = pd.read_csv("Xenium_uTAR_mat.txt", sep="\t", index_col=0)

# Get the barcodes (row indices) from the AnnData object
adata_barcodes = curio_genes_mel.obs.index

# Check if the barcodes in the count matrix columns match with adata's barcodes
if list(count_df.columns) == list(adata_barcodes):
    print("The barcodes are in the same order.")
else:
    print("The barcodes are not in the same order.")
    
    # Optionally, identify mismatched barcodes
#    mismatches = [barcode for barcode, adata_barcode in zip(count_df.columns, adata_barcodes) if barcode != adata_barcode]
#    print("Mismatched barcodes:", mismatches)
The barcodes are in the same order.

Melanoma¶

In [13]:
os.chdir("/scratch/project/stseq/Prakrithi/curio/Human_melanoma_10by10_input/Human_melanoma_10by10/")

Curio gene counts¶

In [3]:
curio_genes_mel = sc.read_h5ad('/QRISdata/Q4386/curio/Human_melanoma_10by10_anndata.h5ad')
In [ ]:
sc.pl.spatial(curio_genes_mel,alpha_img=0.6, color =['PMEL', 'MLANA','DCT','MALAT1'],cmap='Spectral_r',vmax=2, spot_size=30)
No description has been provided for this image
In [5]:
sc.pl.spatial(curio_genes_mel,alpha_img=0.6, color =['VGF', 'FN1','KRT10','SH3KBP1'],cmap='Spectral_r',vmax=2, spot_size=30)
No description has been provided for this image
In [13]:
sc.pl.spatial(curio_genes_mel,alpha_img=0.6, color =['NDRG1', 'BIRC5','ELAVL1','TARDBP'],cmap='Reds',vmax=2, spot_size=100)
No description has been provided for this image
In [ ]:
curio_genes_mel
In [105]:
import anndata
import numpy as np

# Define the gene of interest
gene2 = 'chr5_11929999_11935299_+_2148_0'

# Ensure the gene exists in adata2
if gene2 in Mel_uTARs.var_names:
    # Create a boolean mask for cells where gene2 > 0 in adata2
    mask = Mel_uTARs[:, gene2].X > 0
    
    # Convert the boolean mask to a boolean array (needed for indexing)
    boolean_mask = np.array(mask).flatten()
    
    # Ensure the length of the boolean mask matches the number of cells in adata1
    if len(boolean_mask) == Mel_uTARs.shape[0]:
        # Subset adata1 based on the cells where gene2 > 0 in adata2
        utar_subset = Mel_uTARs[boolean_mask].copy()
        
        # Verify the result
        print(f"Number of cells in adata1 where {gene2} > 0 in adata2: {utar_subset.shape[0]}")
        print(utar_subset.obs.head())
    else:
        raise ValueError("The length of the boolean mask does not match the number of cells in adata2")
else:
    raise ValueError(f"The gene '{gene2}' is not in adata2.var_names")
sc.pl.spatial(utar_subset,alpha_img=0.6, color =['BIRC5'],cmap='Reds',vmax=0.5, spot_size=200)
Number of cells in adata1 where chr5_11929999_11935299_+_2148_0 > 0 in adata2: 24
                imagecol  imagerow
GTGCGCCCTCGCAA   5094.00   6978.50
TAGTAACCTGACTG   6173.40   5723.95
CGTGAAGCGCAAGC   5444.50   6452.00
CACGCCTGTAGAGG   6355.45   5497.15
TGCGGCAAGTTAGT   4926.15   5336.50
No description has been provided for this image
In [133]:
gene_subset[:, "BIRC5"].X.toarray()
Out[133]:
array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]], dtype=float32)
In [ ]:
 
In [116]:
import anndata
import numpy as np

# Define the gene of interest
gene2 = 'chr5_11929999_11935299_+_2148_0'

# Ensure the gene exists in adata2
if gene2 in Mel_uTARs.var_names:
    # Create a boolean mask for cells where gene2 > 0 in adata2
    mask = Mel_uTARs[:, gene2].X > 0
    
    # Convert the boolean mask to a boolean array (needed for indexing)
    boolean_mask = np.array(mask).flatten()
    
    # Ensure the length of the boolean mask matches the number of cells in adata1
    if len(boolean_mask) == Mel_uTARs.shape[0]:
        # Subset adata1 based on the cells where gene2 > 0 in adata2
        gene_subset = curio_genes_mel[boolean_mask].copy()
        
        # Verify the result
        print(f"Number of cells in adata1 where {gene2} > 0 in adata2: {gene_subset.shape[0]}")
        print(gene_subset.obs.head())
    else:
        raise ValueError("The length of the boolean mask does not match the number of cells in adata2")
else:
    raise ValueError(f"The gene '{gene2}' is not in adata2.var_names")
Number of cells in adata1 where chr5_11929999_11935299_+_2148_0 > 0 in adata2: 24
Empty DataFrame
Columns: []
Index: [GTGCGCCCTCGCAA, TAGTAACCTGACTG, CGTGAAGCGCAAGC, CACGCCTGTAGAGG, TGCGGCAAGTTAGT]
In [123]:
c5_pos=gene_subset.obs.index.to_list()
c5_pos
Out[123]:
['GTGCGCCCTCGCAA',
 'TAGTAACCTGACTG',
 'CGTGAAGCGCAAGC',
 'CACGCCTGTAGAGG',
 'TGCGGCAAGTTAGT',
 'CCCTCAAAGTTTGA',
 'GCGGCAGTTGACCG',
 'TAAGCGAGTGTTGG',
 'CGCTTGGGTAGCAT',
 'TGCGCTATTCTAAC',
 'TGCCTGCGGTCGTG',
 'TACACCGAGATAGC',
 'TGCTCTTCCAACAG',
 'CTGGACTTCTGCTC',
 'GTCGCGCTCGTCCG',
 'ACTATGTTAACTCG',
 'ATTATGAAGGCCTA',
 'ATAGTTGCAAGGTG',
 'CCTACGTCCTATTA',
 'AAGGATGCCCCACG',
 'AGATAAGGCGCTGT',
 'AGCTCTTATTTCCG',
 'ATCAGTAGCTATCA',
 'GATGAGTGATGCAA']
In [101]:
sc.pl.spatial(gene_subset,alpha_img=0.6, color =['BIRC5'],cmap='Reds',vmax=0.5, spot_size=200)
No description has been provided for this image

uTARs¶

In [4]:
uM=np.transpose(pd.read_csv('/QRISdata/Q4386/curio/melanoma/interested_uTAR_mat.txt', sep="\t", header=0, index_col=0)) #interested_uTAR_mat_f.txt
spatialuM=pd.read_csv('/QRISdata/Q4386/curio/melanoma/spatial_row_col_rev', sep="\t", header=0)
Mel_uTARs = st.create_stlearn(count=uM,spatial=spatialuM,library_id="M",scale=0.5)
In [5]:
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr12_111603399_111610199_-_603_0','chr9_81829699_81841349_-_802_0','chr2_81555449_81557249_+_9979_0',
                                              'chr15_80010899_80012049_+_328_0','chr2_149482749_149491499_+_1689_0',
                                              'chr5_11929999_11935299_+_2148_0','chr17_49855199_49859399_+_535_0'], spot_size=250,cmap='Reds',vmax=0.5)
#'chr9_22405949_22410899_+_165_0',
No description has been provided for this image
In [28]:
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr7_118702349_118704699_+_474624_0','chr18_13935449_13945249_-_114626_0',
                                              'chr10_31969999_31970199_+_12945_0','chr13_39795849_39798149_+_8404_0'], spot_size=150,cmap='Reds',vmax=0.5)
#'chr9_22405949_22410899_+_165_0',
No description has been provided for this image
In [10]:
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr6_6676749_6678349_-_285_0','chr15_80010899_80012049_+_328_0','chr17_49860099_49862399_+_346_0',
                                               'chr12_28791749_28795299_-_2741_0','chr12_111603399_111610199_-_603_0','chr9_81829699_81841349_-_802_0'],
              spot_size=150,cmap='Reds',vmax=0.5) #'chr9_22405949_22410899_+_165_0'
No description has been provided for this image
In [5]:
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr12_28791749_28795299_-_2741_0'],
              spot_size=150,cmap='inferno',vmax=1) #'chr9_22405949_22410899_+_165_0'
No description has been provided for this image
In [5]:
sc.pp.filter_cells(curio_genes_mel, min_genes=100)
sc.pl.spatial(curio_genes_mel,alpha_img=0.6, color =['PMEL','MLANA'],spot_size=150,cmap='inferno')
No description has been provided for this image
In [18]:
curio_genes_mel
Out[18]:
AnnData object with n_obs × n_vars = 92326 × 28803
    obs: 'n_genes'
    obsm: 'X_spatial', 'spatial'
In [19]:
uM=np.transpose(pd.read_csv('/QRISdata/Q4386/curio/melanoma/interested_uTAR_mat_f.txt', sep="\t", header=0, index_col=0)) #interested_uTAR_mat_f.txt
spatialuM=pd.read_csv('/QRISdata/Q4386/curio/melanoma/spatial_row_col_rev', sep="\t", header=0)
Mel_uTARs = st.create_stlearn(count=uM,spatial=spatialuM,library_id="M",scale=0.5)
# Find common cells between the two objects
common_cells = Mel_uTARs.obs_names.intersection(curio_genes_mel.obs_names)

# Subset Mel_uTARs to keep only those common cells
Mel_uTARs = Mel_uTARs[common_cells].copy()

#sc.pp.filter_cells(Mel_uTARs, min_genes=)
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr10_31969999_31970199_+_12945_0','chr15_80010899_80012049_+_328_0'],
              spot_size=150,cmap='inferno',vmax=1) #'chr9_22405949_22410899_+_165_0'
No description has been provided for this image
In [21]:
import matplotlib.pyplot as plt

with plt.rc_context({"figure.figsize": (8, 8), "figure.dpi": (300)}):
    sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr15_80010899_80012049_+_328_0'], show=False,
              spot_size=150,cmap='inferno',vmax=1)
    plt.savefig("/scratch/project/stseq/Prakrithi/STOMICS/cuTAR94762_curio_mel.pdf", bbox_inches="tight")
No description has been provided for this image
In [8]:
import numpy as np

# Calculate the total counts for each observation (cell)
obs_counts = Mel_uTARs.X.sum(axis=1)

# Keep only observations (cells) with counts >= 50
Mel_uTARs_f = Mel_uTARs[obs_counts >= 1].copy()
In [9]:
sc.pl.spatial(Mel_uTARs_f,alpha_img=0.6, color =['chr12_111603399_111610199_-_603_0','chr9_81829699_81841349_-_802_0','chr2_81555449_81557249_+_9979_0',
                                              'chr15_80010899_80012049_+_328_0','chr2_149482749_149491499_+_1689_0',
                                              'chr5_11929999_11935299_+_2148_0','chr17_49855199_49859399_+_535_0'], spot_size=250,cmap='Reds',vmax=1)
#'chr9_22405949_22410899_+_165_0',
No description has been provided for this image
In [16]:
curio_genes_mel[:, 'PMEL'].X
Out[16]:
<297240x1 sparse matrix of type '<class 'numpy.float32'>'
	with 91951 stored elements in Compressed Sparse Row format>
In [19]:
import numpy as np
import pandas as pd

# 1. Extract BIRC5 counts from the first AnnData (adata1)
if 'PMEL' in curio_genes_mel.var_names:
    birc5_counts = curio_genes_mel[:, 'PMEL'].X
    birc5_df = pd.DataFrame(birc5_counts.toarray(), index=curio_genes_mel.obs.index, columns=['PMEL'])
else:
    print("PMEL is not found in adata1.")

# 2. Ensure that only the matching obs (cells) between adata1 and adata2 are selected
# We perform an inner join on the index to match the cells in both AnnData objects.
common_obs = Mel_uTARs.obs.index.intersection(birc5_df.index)

# 3. Filter the BIRC5 data to only the matching observations
birc5_matching = birc5_df.loc[common_obs]

# 4. Add BIRC5 counts as a new column to the adata2.obs dataframe
Mel_uTARs.obs['PMEL'] = np.nan  # Initialize the column with NaNs
Mel_uTARs.obs.loc[common_obs, 'PMEL'] = birc5_matching['PMEL'].values  # Add BIRC5 counts for matching cells

# 5. Verify the results
print(Mel_uTARs.obs[['PMEL']].dropna())

B=Mel_uTARs.obs[['PMEL']].dropna()
B=B.T
B = B.astype(int)
print(B)
B.to_csv("PMEL_counts.txt", sep="\t")
                PMEL
ATGTATACGGAAGG   0.0
CCTCGGCAAACCGC   0.0
CGGCCATACGAATA   0.0
TGTGTCCTGATGAC   0.0
GCCCCGAGTATCCG   4.0
...              ...
CATGCAGAGTCGAG   1.0
TTCTTAATAACGAT   0.0
CTCCAGTATCGATA   0.0
GGCTCTGAAGTATA   0.0
GCTGATGAGAATGT   0.0

[297240 rows x 1 columns]
      ATGTATACGGAAGG  CCTCGGCAAACCGC  CGGCCATACGAATA  TGTGTCCTGATGAC  \
PMEL               0               0               0               0   

      GCCCCGAGTATCCG  GTGCGGATGGGTTC  CTCCTGGAGTGGCA  CAGTATGGCTGGCT  \
PMEL               4               1               2               2   

      TGGCGGCTGAACCT  GCAGCGGCGCCTTT  ...  ACATACACGAACTG  TCCCATAAGTAACT  \
PMEL               3               0  ...               0               0   

      AGAACAACAGACCA  CAGGAAAGTAAGTG  CACATAAACAAAAT  CATGCAGAGTCGAG  \
PMEL               0               0               0               1   

      TTCTTAATAACGAT  CTCCAGTATCGATA  GGCTCTGAAGTATA  GCTGATGAGAATGT  
PMEL               0               0               0               0  

[1 rows x 297240 columns]
In [18]:
birc5_matching
Out[18]:
BIRC5
ATGTATACGGAAGG 0.0
CCTCGGCAAACCGC 0.0
CGGCCATACGAATA 0.0
TGTGTCCTGATGAC 0.0
GCCCCGAGTATCCG 4.0
... ...
CATGCAGAGTCGAG 1.0
TTCTTAATAACGAT 0.0
CTCCAGTATCGATA 0.0
GGCTCTGAAGTATA 0.0
GCTGATGAGAATGT 0.0

297240 rows × 1 columns

In [29]:
Mel_uTARs
Out[29]:
AnnData object with n_obs × n_vars = 297240 × 13
    obs: 'imagecol', 'imagerow'
    uns: 'spatial'
    obsm: 'spatial'
In [125]:
import numpy as np
import pandas as pd

# 1. Extract BIRC5 counts from the first AnnData (adata1)
if 'BIRC5' in curio_genes_mel.var_names:
    birc5_counts = curio_genes_mel[:, 'BIRC5'].X
    birc5_df = pd.DataFrame(birc5_counts.toarray(), index=curio_genes_mel.obs.index, columns=['BIRC5'])
else:
    print("BIRC5 is not found in adata1.")

# 2. Ensure that only the matching obs (cells) between adata1 and adata2 are selected
# We perform an inner join on the index to match the cells in both AnnData objects.
common_obs = Mel_uTARs.obs.index.intersection(birc5_df.index)

# 3. Filter the BIRC5 data to only the matching observations
birc5_matching = birc5_df.loc[common_obs]

# 4. Add BIRC5 counts as a new column to the adata2.obs dataframe
Mel_uTARs.obs['BIRC5'] = np.nan  # Initialize the column with NaNs
Mel_uTARs.obs.loc[common_obs, 'BIRC5'] = birc5_matching['BIRC5'].values  # Add BIRC5 counts for matching cells

# 5. Verify the results
print(Mel_uTARs.obs[['BIRC5']].dropna())
                BIRC5
ATGTATACGGAAGG    0.0
CCTCGGCAAACCGC    0.0
CGGCCATACGAATA    0.0
TGTGTCCTGATGAC    0.0
GCCCCGAGTATCCG    2.0
...               ...
CATGCAGAGTCGAG    0.0
TTCTTAATAACGAT    0.0
CTCCAGTATCGATA    0.0
GGCTCTGAAGTATA    0.0
GCTGATGAGAATGT    0.0

[297240 rows x 1 columns]
In [42]:
B=Mel_uTARs.obs[['BIRC5']].dropna()
B=B.T
B = B.astype(int)
B
Out[42]:
ATGTATACGGAAGG CCTCGGCAAACCGC CGGCCATACGAATA TGTGTCCTGATGAC GCCCCGAGTATCCG GTGCGGATGGGTTC CTCCTGGAGTGGCA CAGTATGGCTGGCT TGGCGGCTGAACCT GCAGCGGCGCCTTT ... ACATACACGAACTG TCCCATAAGTAACT AGAACAACAGACCA CAGGAAAGTAAGTG CACATAAACAAAAT CATGCAGAGTCGAG TTCTTAATAACGAT CTCCAGTATCGATA GGCTCTGAAGTATA GCTGATGAGAATGT
BIRC5 0 0 0 0 2 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

1 rows × 297240 columns

In [ ]:
 
In [128]:
B=Mel_uTARs.obs[['BIRC5']].dropna()
B=B.loc[c5_pos,:]
B=B.T
B = B.astype(int)
B
Out[128]:
GTGCGCCCTCGCAA TAGTAACCTGACTG CGTGAAGCGCAAGC CACGCCTGTAGAGG TGCGGCAAGTTAGT CCCTCAAAGTTTGA GCGGCAGTTGACCG TAAGCGAGTGTTGG CGCTTGGGTAGCAT TGCGCTATTCTAAC ... GTCGCGCTCGTCCG ACTATGTTAACTCG ATTATGAAGGCCTA ATAGTTGCAAGGTG CCTACGTCCTATTA AAGGATGCCCCACG AGATAAGGCGCTGT AGCTCTTATTTCCG ATCAGTAGCTATCA GATGAGTGATGCAA
BIRC5 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

1 rows × 24 columns

In [121]:
c5_pos
Out[121]:
Index(['GTGCGCCCTCGCAA', 'TAGTAACCTGACTG', 'CGTGAAGCGCAAGC', 'CACGCCTGTAGAGG',
       'TGCGGCAAGTTAGT', 'CCCTCAAAGTTTGA', 'GCGGCAGTTGACCG', 'TAAGCGAGTGTTGG',
       'CGCTTGGGTAGCAT', 'TGCGCTATTCTAAC', 'TGCCTGCGGTCGTG', 'TACACCGAGATAGC',
       'TGCTCTTCCAACAG', 'CTGGACTTCTGCTC', 'GTCGCGCTCGTCCG', 'ACTATGTTAACTCG',
       'ATTATGAAGGCCTA', 'ATAGTTGCAAGGTG', 'CCTACGTCCTATTA', 'AAGGATGCCCCACG',
       'AGATAAGGCGCTGT', 'AGCTCTTATTTCCG', 'ATCAGTAGCTATCA', 'GATGAGTGATGCAA'],
      dtype='object')
In [ ]:
 
In [43]:
B.to_csv("BIRC5_counts.txt", sep="\t")
In [66]:
#gene_uTAR_merged.obsm["gpd"]
In [11]:
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['BIRC5','chr5_11929999_11935299_+_2148_0','chr10_37409199_37411549_+_142_0'], spot_size=250,cmap='Reds',vmax=0.5)
No description has been provided for this image

Morans Mel¶

In [79]:
#spatial.iloc[1:5,:]
#spatial=spatial.set_index('BC')
spatialm=pd.read_csv('spatial_row_col', sep="\t", header=0)
spatialm.index =Mel_uTARs.obs.index
#data.obs=spatial.iloc[:,[0,1,2,3,4]]


from typing import Literal

library_id='M'
#data.obs.iloc[1:5,:]
_QUALITY = Literal["fulres", "hires", "lowres"]
quality: _QUALITY = "hires"
scale = Mel_uTARs.uns["spatial"][library_id]["scalefactors"][
            "tissue_" + quality + "_scalef"]

image_coor = Mel_uTARs.obsm["spatial"] * scale
spatialm["imagecol"] = image_coor[:, 1]*0.208
spatialm["imagerow"] = image_coor[:, 0]*0.208
#s4 0.208 s6 0.2025
 
Mel_uTARs.obsm["gpd"] = gpd.GeoDataFrame(spatialm.iloc[:,:],geometry=gpd.points_from_xy(y=spatialm.imagerow,x=spatialm.imagecol))
In [80]:
# Calculate the total counts for each observation (cell)
obs_counts = Mel_uTARs.X.sum(axis=1)

# Keep only observations (cells) with counts >= 50
Mel_uTARs_f = Mel_uTARs[obs_counts >= 1].copy()
In [81]:
from shapely import wkt
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
from esda.moran import Moran, Moran_Local
from splot.esda import moran_scatterplot, lisa_cluster
from libpysal.weights import Queen
import numpy as np

# Example: If you have coordinates in 'longitude' and 'latitude' columns
df = Mel_uTARs_f.obsm["gpd"]
# Convert the geometry column to shapely geometries
# Assuming df already contains shapely geometries in 'geometry' column
df['geometry'] = df['geometry'].apply(lambda x: Point(x) if not isinstance(x, Point) else x)

# Create a GeoDataFrame
geo_df = gpd.GeoDataFrame(df, geometry='geometry')
In [134]:
## CALC MORAN'S INDEX
epr_df = Mel_uTARs_f.to_df()

gene1_name="BIRC5"
gene2_name="chr5_11929999_11935299_+_2148_0"

Mel_uTARs_f.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
Mel_uTARs_f.obsm["gpd"][gene2_name] = epr_df[gene2_name]
#w = Queen.from_dataframe(Mel_uTARs_f.obsm["gpd"])
w = Queen.from_dataframe(geo_df)
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)

#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)

fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()

# Convert the DataFrame to a GeoDataFrame using the existing 'geometry' column
Mel_uTARs_f_gdf = gpd.GeoDataFrame(Mel_uTARs_f.obsm["gpd"], geometry='geometry')


fig, axs = plt.subplots(1, 1, figsize=(5, 8), subplot_kw={'adjustable':'datalim'})
lisa_cluster(moran_loc_bv, 
             Mel_uTARs_f_gdf, 
             p=0.05,
             figsize=(9, 9),
             markersize=0.05,
             **{"alpha": 1},
             ax=axs,
             legend_kwds={'loc': 'upper left',
                          'bbox_to_anchor': (0.92, 0.8)})

axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10897584/ipykernel_3931819/859448017.py:11: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning
  w = Queen.from_dataframe(geo_df)
No description has been provided for this image
No description has been provided for this image
In [135]:
moran_loc_bv_q_df = pd.DataFrame(moran_loc_bv.q, columns=['moran_local_q'], index=Mel_uTARs_f.obs.index)
print(moran_loc_bv_q_df['moran_local_q'].value_counts())
Mel_uTARs_f.obs['moran_local_q']=moran_loc_bv_q_df['moran_local_q']
Mel_uTARs_f.obs['Coexpressed_spots'] = (Mel_uTARs_f.obs['moran_local_q'] == 1).astype(int)
Mel_uTARs_f.obs['Coexpressed_spots'] = Mel_uTARs_f.obs['moran_local_q'].apply(lambda x: 1 if x == 1 else 0)
Mel_uTARs_f.obs['Coexpressed_spots'].value_counts()
moran_local_q
3    6057
2    4238
1      12
4      12
Name: count, dtype: int64
Out[135]:
Coexpressed_spots
0    10307
1       12
Name: count, dtype: int64
In [99]:
sc.pl.spatial(
    Mel_uTARs_f,
    color='Coexpressed_spots',  # Column with pre-defined colors
    size=5,  cmap='Reds' , vmax=1                 # Size of spots
)
No description has been provided for this image
In [103]:
import numpy as np
import pandas as pd
import anndata

# Example AnnData object (adata) setup for illustration
# adata = anndata.AnnData(X, obs=obs_df, var=var_df)

# Assuming gene1 and gene2 are the names of the genes you are interested in
gene1 = 'BIRC5'
gene2 = 'chr5_11929999_11935299_+_2148_0'#'chr5_11929999_11935299_+_2148_0'

# Ensure that 'gene1' and 'gene2' are present in adata.var
if gene1 in Mel_uTARs_f.var_names and gene2 in Mel_uTARs_f.var_names:
    # Create the new column 'coexpressed' in adata.obs
    Mel_uTARs_f.obs['coexpressed'] = np.where(
        (Mel_uTARs_f[:, gene1].X > 0) & (Mel_uTARs_f[:, gene2].X > 0), 1, 0
    )
else:
    raise ValueError(f"One or both genes '{gene1}' and '{gene2}' are not in adata.var_names")

# Verify the result
Mel_uTARs_f.obs['coexpressed'].value_counts()
Out[103]:
coexpressed
0    10319
Name: count, dtype: int64
In [77]:
moran_loc_bv._Moran_Local_BV__quads
Out[77]:
<bound method Moran_Local_BV.__quads of <esda.moran.Moran_Local_BV object at 0x7f7bb1736d90>>
In [36]:
# List all attributes and methods of the Moran_Local_BV object
print(dir(moran_loc_bv))
['EI_sim', 'Is', 'VI_sim', '_Moran_Local_BV__calc', '_Moran_Local_BV__quads', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_statistic', 'by_col', 'den', 'geoda_quads', 'n', 'n_1', 'p_sim', 'p_z_sim', 'permutations', 'q', 'quads', 'rlisas', 'seI_sim', 'sim', 'w', 'x', 'y', 'z_sim', 'zx', 'zy']
In [ ]:
#HH_points = Mel_uTARs_f_gdf[moran_loc_bv.q == 'HH']
#HH_points

#print(dir(moran_loc_bv))
# Import necessary packages
import numpy as np
import pandas as pd

# Loop over attributes and print the first few rows if it's a valid type
for attr in dir(moran_loc_bv):
    # Filter out special or callable attributes (like methods)
    if not attr.startswith('_') and not callable(getattr(moran_loc_bv, attr)):
        value = getattr(moran_loc_bv, attr)
        
        # Print attribute name
        print(f"Attribute: {attr}")
        
        # Handle different data types and print first few rows
        if isinstance(value, (pd.DataFrame, pd.Series)):  # For pandas DataFrame or Series
            print(value.head(), "\n")
        elif isinstance(value, (np.ndarray, list)):  # For numpy arrays or lists
            print(np.array(value)[:5], "\n")  # Print first 5 elements
        else:
            print(value, "\n")  # Print the value as it is (if it's not a complex type)
In [32]:
fig, ax = plt.subplots(figsize=(4,20))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
No description has been provided for this image
In [38]:
moran_loc_bv.q
Out[38]:
array([3, 3, 3, ..., 3, 3, 3])
In [84]:
import pandas as pd

# Assuming moran_loc_bv.q is a NumPy array or list of the same length as adata.obs
# Convert moran_loc_bv.q to a DataFrame
moran_loc_bv_q_df = pd.DataFrame(moran_loc_bv.q, columns=['moran_local_q'], index=Mel_uTARs_f.obs.index)

# Ensure that the indices match between adata.obs and moran_loc_bv_q_df
assert all(Mel_uTARs_f.obs.index == moran_loc_bv_q_df.index), "Indices of adata.obs and moran_loc_bv_q_df do not match."

# Add the new data to adata.obs
Mel_uTARs_f.obs = Mel_uTARs_f.obs.join(moran_loc_bv_q_df)

# Verify the addition
print(Mel_uTARs_f.obs.head())
Mel_uTARs_f['moran_local_q'].value_counts()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[84], line 11
      8 assert all(Mel_uTARs_f.obs.index == moran_loc_bv_q_df.index), "Indices of adata.obs and moran_loc_bv_q_df do not match."
     10 # Add the new data to adata.obs
---> 11 Mel_uTARs_f.obs = Mel_uTARs_f.obs.join(moran_loc_bv_q_df)
     13 # Verify the addition
     14 print(Mel_uTARs_f.obs.head())

File ~/.local/lib/python3.8/site-packages/pandas/core/frame.py:9729, in DataFrame.join(self, other, on, how, lsuffix, rsuffix, sort, validate)
   9566 def join(
   9567     self,
   9568     other: DataFrame | Series | Iterable[DataFrame | Series],
   (...)
   9574     validate: str | None = None,
   9575 ) -> DataFrame:
   9576     """
   9577     Join columns of another DataFrame.
   9578 
   (...)
   9727     5  K1  A5   B1
   9728     """
-> 9729     return self._join_compat(
   9730         other,
   9731         on=on,
   9732         how=how,
   9733         lsuffix=lsuffix,
   9734         rsuffix=rsuffix,
   9735         sort=sort,
   9736         validate=validate,
   9737     )

File ~/.local/lib/python3.8/site-packages/pandas/core/frame.py:9768, in DataFrame._join_compat(self, other, on, how, lsuffix, rsuffix, sort, validate)
   9758     if how == "cross":
   9759         return merge(
   9760             self,
   9761             other,
   (...)
   9766             validate=validate,
   9767         )
-> 9768     return merge(
   9769         self,
   9770         other,
   9771         left_on=on,
   9772         how=how,
   9773         left_index=on is None,
   9774         right_index=True,
   9775         suffixes=(lsuffix, rsuffix),
   9776         sort=sort,
   9777         validate=validate,
   9778     )
   9779 else:
   9780     if on is not None:

File ~/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py:162, in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
    131 @Substitution("\nleft : DataFrame or named Series")
    132 @Appender(_merge_doc, indents=0)
    133 def merge(
   (...)
    146     validate: str | None = None,
    147 ) -> DataFrame:
    148     op = _MergeOperation(
    149         left,
    150         right,
   (...)
    160         validate=validate,
    161     )
--> 162     return op.get_result(copy=copy)

File ~/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py:811, in _MergeOperation.get_result(self, copy)
    807     self.left, self.right = self._indicator_pre_merge(self.left, self.right)
    809 join_index, left_indexer, right_indexer = self._get_join_info()
--> 811 result = self._reindex_and_concat(
    812     join_index, left_indexer, right_indexer, copy=copy
    813 )
    814 result = result.__finalize__(self, method=self._merge_type)
    816 if self.indicator:

File ~/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py:763, in _MergeOperation._reindex_and_concat(self, join_index, left_indexer, right_indexer, copy)
    760 left = self.left[:]
    761 right = self.right[:]
--> 763 llabels, rlabels = _items_overlap_with_suffix(
    764     self.left._info_axis, self.right._info_axis, self.suffixes
    765 )
    767 if left_indexer is not None and not is_range_indexer(left_indexer, len(left)):
    768     # Pinning the index here (and in the right code just below) is not
    769     #  necessary, but makes the `.take` more performant if we have e.g.
    770     #  a MultiIndex for left.index.
    771     lmgr = left._mgr.reindex_indexer(
    772         join_index,
    773         left_indexer,
   (...)
    778         use_na_proxy=True,
    779     )

File ~/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py:2604, in _items_overlap_with_suffix(left, right, suffixes)
   2601 lsuffix, rsuffix = suffixes
   2603 if not lsuffix and not rsuffix:
-> 2604     raise ValueError(f"columns overlap but no suffix specified: {to_rename}")
   2606 def renamer(x, suffix):
   2607     """
   2608     Rename the left and right indices.
   2609 
   (...)
   2620     x : renamed column name
   2621     """

ValueError: columns overlap but no suffix specified: Index(['moran_local_q'], dtype='object')
In [ ]:
Mel_uTARs_f.obs['Coexpressed_spots'] = Mel_uTARs_f.obs['moran_local_q'].apply(lambda x: 10 if x == "1" else 0)
sc.pl.spatial(
    Mel_uTARs_f,
    color='Coexpressed_spots',  # Column with pre-defined colors
    size=2,  cmap='Reds'                   # Size of spots
)
In [28]:
Mel_uTARs_f.obs['Coexpressed_spots'] = Mel_uTARs_f.obs['moran_local_q'].apply(lambda x: 10 if x == "1" else 0)
Mel_uTARs_f.obs['Coexpressed_spots'].value_counts()
Out[28]:
Coexpressed_spots
0    98086
Name: count, dtype: int64
In [18]:
import pandas as pd

# Convert moran_loc_bv.q to a DataFrame
moran_loc_bv_q_df = pd.DataFrame(moran_loc_bv.q, columns=['moran_local_q'], index=Mel_uTARs_f.obs.index)

# Check for column overlap in Mel_uTARs_f.obs
existing_columns = Mel_uTARs_f.obs.columns
if 'moran_local_q' in existing_columns:
    # Rename the column in moran_loc_bv_q_df to avoid overlap
    moran_loc_bv_q_df.rename(columns={'moran_local_q': 'moran_local_q_new'}, inplace=True)

# Add the new data to adata.obs
Mel_uTARs_f.obs = Mel_uTARs_f.obs.join(moran_loc_bv_q_df)

# Verify the addition
print(Mel_uTARs_f.obs.head())
                imagecol  imagerow  moran_local_q
GCCCCGAGTATCCG   5419.20   6997.75              3
GTGCGGATGGGTTC   5495.50   4708.40              3
CTCCTGGAGTGGCA   2456.50   4922.75              3
CAGTATGGCTGGCT   5641.55   6668.85              3
TGGCGGCTGAACCT   2643.40   4296.00              3
In [ ]:
 
In [41]:
Mel_uTARs_f.obs['moran_local_q'].unique()
Mel_uTARs_f.obs['moran_local_q_new'].unique()
Out[41]:
array([3, 2, 4, 1])
In [43]:
Mel_uTARs_f.obs['moran_local_q_new'].value_counts()
Out[43]:
moran_local_q
3    92372
2     5690
4       23
1        1
Name: count, dtype: int64
In [89]:
import scanpy as sc
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# Ensure 'moran_local_q_new' is categorical
Mel_uTARs_f.obs['moran_local_q_new'] = Mel_uTARs_f.obs['moran_local_q_new'].astype('category')

# Define the colors for the categories
colors = ['red', 'green', 'orange', 'blue']
categories = Mel_uTARs_f.obs['moran_local_q_new'].cat.categories

# Create a colormap and norm
cmap = mcolors.ListedColormap(colors)
norm = mcolors.BoundaryNorm(boundaries=range(len(categories)+1), ncolors=len(categories))

# Plot the spatial data
sc.pl.spatial(
    Mel_uTARs_f,
    alpha_img=0.6,
    color='moran_local_q_new',
    spot_size=100,
    cmap=cmap,
    norm=norm,
    vmax=len(categories)-1
)
No description has been provided for this image
In [91]:
Co_exp = Mel_uTARs_f[Mel_uTARs_f.obs['moran_local_q_new'] == 1].copy()
sc.pl.spatial(
    Co_exp,
    alpha_img=0.6,
    color='moran_local_q_new',
    spot_size=200)
No description has been provided for this image
In [117]:
# Define the color map
colors = {
    '1': 'red',
    '2': 'lightgrey',
    '3': 'lightgrey',
    '4': 'lightgrey'    
}

# Ensure 'moran_local_q_new' is in string format if necessary
Mel_uTARs_f.obs['moran_local_q_new'] = Mel_uTARs_f.obs['moran_local_q_new'].astype(str)

# Map colors correctly
Mel_uTARs_f.obs['moran_local_q_color'] = Mel_uTARs_f.obs['moran_local_q_new'].map(colors)

# Verify the result
print(Mel_uTARs_f.obs[['moran_local_q_new', 'moran_local_q_color']].head())
               moran_local_q_new moran_local_q_color
GCCCCGAGTATCCG                 2           lightgrey
GATAACGGCCGTCC                 2           lightgrey
GGTACCTCGTTGGA                 3           lightgrey
AAACTCACATTCTC                 2           lightgrey
CACGGATGGACAGT                 3           lightgrey
In [122]:
# Map the colors to the combined clusters
#Mel_uTARs_f.obs['moran_local_q_color'] = Mel_uTARs_f.obs['moran_local_q_new'].astype(str).map(colors)

# Plot using sc.pl.spatial with the custom color column
sc.pl.spatial(
    Mel_uTARs_f,
    color='moran_local_q_new',  # Column with pre-defined colors
    size=5,                     # Size of spots
)
No description has been provided for this image
In [67]:
Mel_uTARs_f.obs['Coexpressed_spots'] = Mel_uTARs_f.obs['moran_local_q'].apply(lambda x: 10 if x == "1" else 0)
sc.pl.spatial(
    Mel_uTARs_f,
    color='Coexpressed_spots',  # Column with pre-defined colors
    size=5,  cmap='Reds' , vmax=1                 # Size of spots
)
No description has been provided for this image
In [127]:
Mel_uTARs_f.obs['moran_local_q_reversed'].value_counts()
Out[127]:
moran_local_q_reversed
0    10319
Name: count, dtype: int64
In [135]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
from splot.esda import moran_scatterplot, lisa_cluster
from libpysal.weights import Queen
from esda.moran import Moran, Moran_Local

# Convert the AnnData object to DataFrame
epr_df = Mel_uTARs_f.to_df()

# Define gene names
gene1_name = "BIRC5"
gene2_name = "chr5_11929999_11935299_+_2148_0"

# Add gene expression data to obsm
Mel_uTARs_f.obsm["gpd"][gene1_name] = epr_df[gene1_name]
Mel_uTARs_f.obsm["gpd"][gene2_name] = epr_df[gene2_name]

# Extract the geometry data
geo_df = Mel_uTARs_f.obsm["gpd"].copy()

# Ensure the geometry column is correctly formatted
if not isinstance(geo_df['geometry'].iloc[0], (Point,)):
    raise ValueError("Geometry column should contain Shapely Point objects. Please convert the data accordingly.")

# Create a GeoDataFrame
geo_df = gpd.GeoDataFrame(geo_df, geometry='geometry')

# Calculate spatial weights
w = Queen.from_dataframe(geo_df)

# Extract gene expression data
x = np.array(epr_df[gene1_name]).astype(float)
y = np.array(epr_df[gene2_name]).astype(float)

# Calculate Moran's Index
moran = Moran(y, w)
moran_loc_bv = Moran_Local(y, w)

# Plot Moran's scatterplot
fig, ax = plt.subplots(figsize=(4, 18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel(f'Gene {gene1_name}')
ax.set_ylabel(f'Gene {gene2_name}')
plt.tight_layout()
plt.show()

# Plot spatial correlation
fig, axs = plt.subplots(1, 1, figsize=(9, 9), subplot_kw={'adjustable': 'datalim'})
lisa_cluster(
    moran_loc_bv,
    geo_df,
    p=0.05,  # Adjust p-value threshold if needed
    markersize=5,  # Adjust markersize for better visibility
    alpha=0.7,  # Adjust alpha for better visibility
    ax=axs,
    legend_kwds={'loc': 'upper left', 'bbox_to_anchor': (0.92, 0.8)}
)
axs.set_title(f'{gene1_name} vs {gene2_name}', y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10858931/ipykernel_3186245/383420554.py:31: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning
  w = Queen.from_dataframe(geo_df)
No description has been provided for this image
No description has been provided for this image
In [137]:
print(f"Min: {moran_loc_bv.q.min()}, Max: {moran_loc_bv.q.max()}")
Min: 2, Max: 4
In [139]:
moran_loc_bv.q.shape
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[139], line 1
----> 1 moran_loc_bv.q.shape()

TypeError: 'tuple' object is not callable

PMEL¶

In [30]:
## CALC MORAN'S INDEX
epr_df = Mel_uTARs_f.to_df()

gene1_name="PMEL"
gene2_name="chr10_31969999_31970199_+_12945_0"

Mel_uTARs_f.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
Mel_uTARs_f.obsm["gpd"][gene2_name] = epr_df[gene2_name]
#w = Queen.from_dataframe(Mel_uTARs_f.obsm["gpd"])
w = Queen.from_dataframe(geo_df)
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)

#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)

fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
/scratch/temp/10869136/ipykernel_1006190/1010272377.py:11: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning
  w = Queen.from_dataframe(geo_df)
No description has been provided for this image
In [36]:
import geopandas as gpd

import geopandas as gpd

# Convert the DataFrame to a GeoDataFrame using the existing 'geometry' column
Mel_uTARs_f_gdf = gpd.GeoDataFrame(Mel_uTARs_f.obsm["gpd"], geometry='geometry')


fig, axs = plt.subplots(1, 1, figsize=(5, 8), 
                        subplot_kw={'adjustable':'datalim'})
lisa_cluster(moran_loc_bv, 
             Mel_uTARs_f_gdf, 
             p=0.05,
             figsize=(9, 9),
             markersize=0.15,
             **{"alpha": 1},
             ax=axs,
             legend_kwds={'loc': 'upper left',
                          'bbox_to_anchor': (0.92, 0.8)})

axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
No description has been provided for this image
In [ ]:
 
In [ ]: